library(tidyr) 
## Warning: package 'tidyr' was built under R version 3.6.2
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.5     ✓ dplyr   1.0.4
## ✓ tibble  3.0.6     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ✓ purrr   0.3.4
## Warning: package 'ggplot2' was built under R version 3.6.2
## Warning: package 'tibble' was built under R version 3.6.2
## Warning: package 'readr' was built under R version 3.6.2
## Warning: package 'purrr' was built under R version 3.6.2
## Warning: package 'dplyr' was built under R version 3.6.2
## Warning: package 'forcats' was built under R version 3.6.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr) 
library(purrr)
library(ggplot2) 
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(corrplot)
## corrplot 0.90 loaded
library(RColorBrewer)
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.2
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(DataExplorer)
## Warning: package 'DataExplorer' was built under R version 3.6.2
setwd('/Users/britney/Desktop/STA 395/video-game-reviews/clean_data')
games = read.csv("clean_data_all.csv")
#summary(games)
plot_intro(games)

#Popular Genres
#Convert release date to year
games$year_released = year(as.character(games$release_date))
genres = games %>% select(year_released, starts_with("genres_"))

#Genres type by frequency, sorted in descending order
sort_genres = data.frame(colSums(genres[2:ncol(genres)])) %>% rownames_to_column()  %>% rename(count = colSums.genres.2.ncol.genres..., genre_type = rowname) %>% arrange(-count)

#Get the top 20 most frequent genres
top20_genres = sort_genres[1:20,]

#Group top 20 by year and summarize the count
top20_genres_by_year = genres %>% select(year_released, top20_genres$genre_type) %>% group_by(year_released) %>% summarise(across(everything(), sum)) 

#Tidy data for plotly
top20_genres_by_year_tidy_count = top20_genres_by_year %>% gather(key = genre_type, value = count, 2:21)
top20_genres_by_year_tidy_count$genre_type = gsub("genres_", "", top20_genres_by_year_tidy_count$genre_type) #clean type name

#Interactive visualization with plotly (by count)
visual_top20_by_count = plot_ly(top20_genres_by_year_tidy_count, type = "bar", x = ~fct_reorder(genre_type, count), y = ~count, frame = ~year_released, showlegend = FALSE)
visual_top20_by_count
#Top 20 by percent
total_games_by_year = games %>% count(year_released) # calculate total number of games released per year
top20_genres_by_year_tidy_percent = top20_genres_by_year %>% left_join(total_games_by_year, by="year_released")
top20_genres_by_year_tidy_percent = top20_genres_by_year_tidy_percent %>% mutate(across(starts_with("genres_"), .fns = ~./n * 100))

#Tidy data for plotly
top20_genres_by_year_tidy_percent = top20_genres_by_year_tidy_percent %>% select(-n) %>% gather(key = genre_type, value = percent, starts_with("genres_"))
top20_genres_by_year_tidy_percent$genre_type = gsub("genres_", "", top20_genres_by_year_tidy_percent$genre_type) #clean type name

#Interactive visualization with plotly (by percent)
visual_top20_by_percent = plot_ly(top20_genres_by_year_tidy_percent, type = "bar", x = ~fct_reorder(genre_type, percent), y = ~percent, frame = ~year_released, showlegend = FALSE) 
visual_top20_by_percent
#ESRB Rating Trends across year
ESRB = games %>% count(year_released, esrb_ratings) %>% na.omit()
ESRB_by_year = ESRB %>% group_by(year_released) %>% summarise(sum(n)) %>% rename(total_by_year = `sum(n)`)
ESRB = ESRB %>% left_join(ESRB_by_year, by="year_released")
ESRB$percent = ESRB$n / ESRB$total_by_year
ESRB = ESRB %>% select(-n, -total_by_year)

plot_ly(ESRB, type = "pie",labels = ~esrb_ratings, values = ~percent, frame = ~year_released,
        textinfo = 'label+percent')
#Trends of ESRB content descriptions over years
esrb_content = games %>% select(year_released, starts_with("esrb_descs_"))

#Group by year and summarize the count
esrb_content_by_year = esrb_content %>% group_by(year_released) %>% summarise(across(everything(), sum)) 

#Tidy data for plotly
esrb_content_by_year_tidy_count = esrb_content_by_year %>% gather(key = esrb_content, value = count, 2:8)
esrb_content_by_year_tidy_count$esrb_content = gsub("esrb_descs_", "", esrb_content_by_year_tidy_count$esrb_content) #clean type name
esrb_content_by_year_tidy_count = esrb_content_by_year_tidy_count[!esrb_content_by_year_tidy_count$esrb_content == "missing",] #drop missing values

#Interactive visualization with plotly (by count)
visual_esrb_content_by_count = plot_ly(esrb_content_by_year_tidy_count, type = "bar", x = ~fct_reorder(esrb_content, count), y = ~count, frame = ~year_released, showlegend = FALSE)
visual_esrb_content_by_count
##missing data for esrb content descriptions from 2015-2021

#by percent
total_games_by_year = games %>% count(year_released) # calculate total number of games released per year
esrb_content_by_year_tidy_percent = esrb_content_by_year %>% left_join(total_games_by_year, by="year_released")
esrb_content_by_year_tidy_percent = esrb_content_by_year_tidy_percent %>% mutate(across(starts_with("esrb_descs_"), .fns = ~./n * 100))

#Tidy data for plotly
esrb_content_by_year_tidy_percent = esrb_content_by_year_tidy_percent %>% select(-n) %>% gather(key = esrb_content, value = percent, starts_with("esrb_descs_"))
esrb_content_by_year_tidy_percent$esrb_content = gsub("esrb_descs_", "", esrb_content_by_year_tidy_percent$esrb_content) #clean type name

#Interactive visualization with plotly (by percent)
visual_esrb_content_by_percent = plot_ly(esrb_content_by_year_tidy_percent, type = "bar", x = ~fct_reorder(esrb_content, percent), y = ~percent, frame = ~year_released, showlegend = FALSE) 
visual_esrb_content_by_percent
#Platform Trends across year
summary(games$platform)
##              3DS        Dreamcast               DS Game Boy Advance 
##              369              118              554              331 
##         GameCube      Nintendo 64               PC      PlayStation 
##              400               70             4543              160 
##    PlayStation 2    PlayStation 3    PlayStation 4    PlayStation 5 
##             1243             1190             1910              124 
## PlayStation Vita              PSP           Stadia           Switch 
##              242              443                5             1220 
##              Wii            Wii U             Xbox         Xbox 360 
##              582              178              648             1511 
##         Xbox One    Xbox Series X 
##             1036               86
platform = games %>% count(year_released, platform) %>% na.omit()
platform_by_year = platform %>% group_by(year_released) %>% summarise(sum(n)) %>% rename(total_by_year = `sum(n)`)
platform = platform %>% left_join(platform_by_year, by="year_released")
platform$percent = platform$n / platform$total_by_year
platform = platform %>% select(-n, -total_by_year)

plot_ly(platform, type = "pie",labels = ~platform, values = ~percent, frame = ~year_released,
        textinfo = 'label+percent')
empty_bar <- 10
 
# Add lines to the initial dataset
# to_add <- matrix(NA, empty_bar, ncol(a))
# colnames(to_add) <- colnames(a)
# a <- rbind(a, to_add)
# a$id <- seq(1, nrow(a))
#  
# # Get the name and the y position of each label
# label_data <- a
# number_of_bar <- nrow(label_data)
# angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar     # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
# label_data$hjust <- ifelse( angle < -90, 1, 0)
# label_data$angle <- ifelse(angle < -90, angle+180, angle)
#  
# # Make the plot
# p <- ggplot(a, aes(x=as.factor(id), y=value)) +       # Note that id is a factor. If x is numeric, there is some space between the first bar
#   geom_bar(stat="identity", fill=alpha("green", 0.3)) +
#   ylim(-100,120) +
#   theme_minimal() +
#   theme(
#     axis.text = element_blank(),
#     axis.title = element_blank(),
#     panel.grid = element_blank(),
#     plot.margin = unit(rep(-1,4), "cm") 
#   ) +
#   coord_polar(start = 0) + 
#   geom_text(data=label_data, aes(x=id, y=value+10, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle, inherit.aes = FALSE ) 
#  
# p
#Correlation b/t metascore & userscore 
Corr = cor(games$meta_score, games$user_score)
Corr 
## [1] 0.5345749
#distribution user & metacritic scores
ggplot(games) + geom_histogram(mapping = aes(x = user_score)) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(games) + geom_histogram(mapping = aes(x = meta_score)) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.